suppressPackageStartupMessages(library(package = "knitr"))
suppressPackageStartupMessages(library(package = "Biobase"))
suppressPackageStartupMessages(library(package = "RColorBrewer"))
suppressPackageStartupMessages(library(package = "tidyverse"))
suppressPackageStartupMessages(library(package = "ggbeeswarm"))
suppressPackageStartupMessages(library(package = "pvca")) # dleelab/pvca
suppressPackageStartupMessages(library(package = "lme4"))
suppressPackageStartupMessages(library(package = "cluster"))
suppressPackageStartupMessages(library(package = "ComplexHeatmap")) # Bioc Install
suppressPackageStartupMessages(library(package = "ImmuneSignatures2"))
These figures only look at the “Young” cohort that is defined as 18 to 50 years old.
noRespGEFile <- list.files(path = dataCacheDir,
pattern = "young_norm_noResponse_eset.rds",
full.names = TRUE)
esetNoResp <- readRDS(file = noRespGEFile)
withRespGEFile <- list.files(path = dataCacheDir,
pattern = "young_norm_withResponse_eset.rds",
full.names = TRUE)
esetWithResp <- readRDS(file = withRespGEFile)
Authors: Slim Fourati, Joanna Arce
# any sampling data after 20 days coded as >20
df.fig1b <- pData(esetNoResp) %>%
select(uid, study_time_collected, pathogen, vaccine_type) %>%
arrange(study_time_collected) %>%
mutate(study_time_collected = signif(study_time_collected, digits = 3),
study_time_collected.factor = ifelse(test = study_time_collected > 20,
yes = ">20",
no = study_time_collected),
study_time_collected.factor =
factor(study_time_collected.factor,
levels = unique(study_time_collected.factor)),
vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
group_by(study_time_collected.factor, vaccine) %>%
summarize(n = n())
ggplot(data = df.fig1b,
mapping = aes(x = study_time_collected.factor,
y = n)) +
geom_bar(stat = "identity", mapping = aes(fill = vaccine)) +
labs(x = "Days post-vaccination", y = "Number of samples") +
scale_y_continuous(limits = c(0, 800),
breaks = seq(from = 0, to = 800, by = 100)) +
scale_fill_manual(name = "Pathogen (Vaccine type)",
values = vaccine2color) +
theme_bw() +
theme(panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.pos = "bottom",
legend.text = element_text(size = 7),
legend.key.height = unit(0.02, units = "npc"),
legend.key.width = unit(0.015, units = "npc")) +
guides(fill = guide_legend(ncol = 2))
Author: Slim Fourati
df.fig1c <- pData(esetNoResp) %>%
select(participant_id, pathogen, vaccine_type, age_imputed, gender_imputed) %>%
distinct() %>%
mutate(vaccine = paste0(pathogen, " (", vaccine_type, ")"))
# remove vaccine where all the participant have the same age
flag <- df.fig1c %>%
group_by(vaccine) %>%
summarize(nbUniqueAge = length(unique(age_imputed))) %>%
filter(nbUniqueAge > 1)
df.fig1c <- filter(df.fig1c, vaccine %in% flag$vaccine)
ggplot(data = df.fig1c,
mapping = aes(x = vaccine,
y = age_imputed)) +
geom_boxplot(fill = "transparent", outlier.color = "transparent") +
geom_beeswarm(mapping = aes(color = vaccine, shape = gender_imputed),
cex = 0.3) +
scale_color_manual(name = "Pathogen (Vaccine type)",
values = vaccine2color) +
labs(x = "Pathogen (Vaccine type)", y = "Age") +
theme_bw() +
theme(panel.grid = element_blank(),
panel.grid.major.y = element_line(color = "lightgrey"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.text = element_text(size = 7),
legend.key.height = unit(0.02, units = "npc"),
legend.key.width = unit(0.015, units = "npc")) +
guides(fill = guide_legend(ncol = 1))
# test for difference between vaccine
kruskal.test(formula = age_imputed ~ vaccine, data = df.fig1c)
##
## Kruskal-Wallis rank sum test
##
## data: age_imputed by vaccine
## Kruskal-Wallis chi-squared = 110.1, df = 7, p-value < 2.2e-16
Author: Slim Fourati
em.fig1d <- exprs(esetNoResp)
pd.fig1d <- pData(esetNoResp) %>%
mutate(Vaccine = paste0(pathogen, " (", vaccine_type, ")")) %>%
select(uid, gender_imputed, ethnicity, Vaccine, study_time_collected,
age_imputed) %>%
rename(Gender = gender_imputed,
Ethnicity = ethnicity,
Timepoint = study_time_collected,
Age = age_imputed) %>%
column_to_rownames(var = "uid")
pd.fig1d <- pd.fig1d[colnames(em.fig1d), ]
df.fig1d <- data.frame(explained = as.vector(fit),
effect = names(fit)) %>%
arrange(explained) %>%
mutate(effect = factor(effect, levels = effect))
ggplot(data = df.fig1d,
mapping = aes(x = effect, y = explained)) +
geom_bar(stat = "identity") +
geom_text(aes(label = signif(explained, digits = 3)),
nudge_y = 0.01,
size = 3) +
labs(x = NULL, y = "Proportion of the variance explained") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90,
vjust = 0.5,
hjust = 1))
Author: Slim Fourati
Pre-Processing: Microarray probes may have different affinities for their target mRNA and thus the itensities can’t be compared between probes inside the same sample. A way to correct for that is to scale each probe to a mean of 0 and a standard-deviation of 1.
The following looks only at pre-vaccination timepoints
sleaFile <- file.path(dataCacheDir, "sleaSet.rds")
sleaCached <- file.exists(sleaFile)
if(sleaCached){
sleaSet <- readRDS(sleaFile)
}
batch <- interaction(esetNoResp$study_accession,
esetNoResp$matrix,
drop = TRUE)
scaledMat <- by(data = t(exprs(esetNoResp)),
INDICES = batch,
FUN = function(x) scale(x)) %>%
do.call(what = rbind) %>%
t()
scaledMat <- scaledMat[, sampleNames(esetNoResp)]
# SLEA (Ref: Lopez-Bigas N. et al. (2008) Genome Biol.)
B <- 100
flag <- lapply(all_genesets, FUN = intersect, y = rownames(scaledMat)) %>%
sapply(FUN = length)
zscoreMat <- sapply(all_genesets, FUN = function(gs) {
gs <- intersect(gs, rownames(scaledMat))
ngenes <- length(gs)
mu <- colMeans(scaledMat[gs, , drop = FALSE], na.rm = TRUE)
muPermut <- mclapply(1:B, FUN = function(seed) {
set.seed(seed = seed)
muHat <- colMeans(scaledMat[sample.int(nrow(scaledMat), ngenes), , drop = FALSE],
na.rm = TRUE)
return(value = muHat)
})
muPermut <- do.call(what = rbind, args = muPermut)
zscore <- (mu - colMeans(muPermut, na.rm = TRUE)) /
apply(muPermut, MARGIN = 2, FUN = sd, na.rm = TRUE)
return(value = zscore)
})
sleaSet <- ExpressionSet(assayData = t(zscoreMat),
phenoData = AnnotatedDataFrame(pData(esetNoResp)))
saveRDS(sleaSet, file = sleaFile)
inflamGS <- c("HALLMARK_INFLAMMATORY_RESPONSE",
"HALLMARK_COMPLEMENT",
"HALLMARK_IL6_JAK_STAT3_SIGNALING",
"HALLMARK_TNFA_SIGNALING_VIA_NFKB")
Author: Slim Fourati
zscoreTemp <- exprs(sleaSet)[, sleaSet$study_time_collected <= 0]
# remove genesets with missing symbols in virtual study
zscoreTemp <- zscoreTemp[rowMeans(is.na(zscoreTemp)) < 1, ]
# for representation purpsis only present top 200 varying genesets
varLS <- apply(zscoreTemp, MARGIN = 1, FUN = var)
zscoreTemp <- zscoreTemp[order(varLS, decreasing = TRUE)[1:200], ]
# cutree into three groups (see gap statistic analysis
set.seed(seed = 23)
tree_col <- kmeans(x = t(zscoreTemp), centers = 3)
gr <- tree_col$cluster
# relabel cluster based on inflammatory genesets
df.fig2 <- data.frame(mu = colMeans(zscoreTemp[inflamGS, ])) %>%
rownames_to_column()
df.fig2 <- data.frame(grn = gr) %>%
rownames_to_column() %>%
merge(x = df.fig2) %>%
merge(y = pData(sleaSet),
by.x = "rowname",
by.y = "uid")
df.fig2 %>%
group_by(grn) %>%
summarize(q2 = median(mu)) %>%
ungroup() %>%
mutate(gr = ifelse(q2 %in% min(q2),
yes = "low",
no = NA),
gr = ifelse(q2 %in% max(q2) & is.na(gr),
yes = "high",
no = gr),
gr = ifelse(is.na(gr),
yes = "mid",
no = gr),
gr = factor(gr, levels = c("low", "mid", "high"))) %>%
merge(x = df.fig2,
by = "grn") %>%
column_to_rownames(var = "rowname") -> df.fig2
df.fig2 <- df.fig2[colnames(zscoreTemp), ]
# Remove names of unwanted cells in plot
rowLabels <- rownames(zscoreTemp)
targetCellNames <- "B.cell|E2F|T.cell|MYC| NK| Interferon|STAT|migration|Monocytes| DC|Neutrophiles"
unwantedCells <- !grepl(pattern = targetCellNames,
rownames(zscoreTemp),
ignore.case = TRUE)
rowLabels[ unwantedCells ] <- ""
# generate heatmap annotation
ha <- df.fig2 %>%
rownames_to_column() %>%
mutate(inflam.gs = findInterval(mu,
vec = quantile(mu,
probs = c(0, 0.33, 0.66, 1)),
rightmost.closed = TRUE),
inflam.gs = paste0("tier", inflam.gs)) %>%
select(rowname, inflam.gs) %>%
column_to_rownames() %>%
.[colnames(zscoreTemp), , drop = FALSE] %>%
HeatmapAnnotation(df = .,
col = list(inflam.gs = c(tier1 = "yellow",
tier2 = "orange",
tier3 = "red")))
Heatmap(matrix = zscoreTemp,
row_names_side = "left",
show_column_names = FALSE,
column_split = df.fig2$gr,
show_row_dend = FALSE,
name = "SLEA z-score",
row_labels = rowLabels,
row_names_gp = gpar(fontsize = 8),
top_annotation = ha)
Author: Slim Fourati
clusGapFile <- file.path(dataCacheDir, "clusGap.rds")
if(!file.exists(clusGapFile)){
set.seed(seed = 23)
fit <- clusGap(t(zscoreTemp), kmeans, K.max = 10)
saveRDS(fit, file = clusGapFile)
}else{
fit <- readRDS(clusGapFile)
}
plot(fit)
print(fit, method = "firstSEmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(zscoreTemp), FUNcluster = kmeans, K.max = 10)
## B=100 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstSEmax', SE.factor=1): 3
## logW E.logW gap SE.sim
## [1,] 9.097460 9.751194 0.6537344 0.005032655
## [2,] 8.987448 9.645416 0.6579687 0.003778555
## [3,] 8.939257 9.611883 0.6726255 0.004373447
## [4,] 8.906345 9.583541 0.6771966 0.004954919
## [5,] 8.888788 9.565856 0.6770678 0.004154478
## [6,] 8.870428 9.551132 0.6807041 0.003865681
## [7,] 8.854358 9.537842 0.6834838 0.003517341
## [8,] 8.844756 9.526233 0.6814774 0.004054859
## [9,] 8.833201 9.514832 0.6816309 0.003835592
## [10,] 8.822944 9.505262 0.6823179 0.003576149
# continuous variable
df.fig2 %>%
select(gr,
age_reported,
age_imputed) %>%
pivot_longer(cols = -gr) %>%
group_by(name) %>%
do(p = kruskal.test(formula = value~gr,
data = .)$p.value) %>%
ungroup() %>%
mutate(p = unlist(p))
## # A tibble: 2 x 2
## name p
## <chr> <dbl>
## 1 age_imputed 0.630
## 2 age_reported 0.357
# categorical variable
df.fig2 %>%
select(gr,
study_time_collected,
gender,
race,
ethnicity,
exposure_material_reported,
matrix,
study_accession,
Hispanic,
White,
Asian,
Black,
cell_type,
cohort,
featureSetName,
featureSetName2,
featureSetVendor,
vaccine,
vaccine_type,
adjuvant,
pathogen) %>%
sapply(FUN = as.character) %>%
as.data.frame() %>%
pivot_longer(cols = -gr) %>%
group_by(name) %>%
do(p = {
fit <- try(fisher.test(table(.$value, .$gr),
simulate.p.value = TRUE),
silent = TRUE)
if ("try-error" %in% class(fit)) {
NA
} else {
fit$p.value
}
}) %>%
ungroup() %>%
mutate(p = unlist(p), adj.p = p.adjust(p, method = "BH"))
## # A tibble: 20 x 3
## name p adj.p
## <chr> <dbl> <dbl>
## 1 adjuvant 0.394 0.702
## 2 Asian 0.781 0.781
## 3 Black 0.751 0.781
## 4 cell_type 0.106 0.474
## 5 cohort 0.109 0.474
## 6 ethnicity 0.166 0.474
## 7 exposure_material_reported 0.270 0.656
## 8 featureSetName 0.157 0.474
## 9 featureSetName2 0.163 0.474
## 10 featureSetVendor 0.0500 0.474
## 11 gender 0.329 0.659
## 12 Hispanic 0.532 0.710
## 13 matrix 0.295 0.656
## 14 pathogen 0.481 0.702
## 15 race 0.491 0.702
## 16 study_accession 0.651 0.779
## 17 study_time_collected 0.439 0.702
## 18 vaccine 0.662 0.779
## 19 vaccine_type 0.741 0.781
## 20 White 0.0255 0.474
Author: Slim Fourati
df.fig2sb <- df.fig2 %>%
filter(mu < 4.5 & mu > -4.5) %>% # remove outlier
filter(duplicated(participant_id) |
duplicated(participant_id, fromLast = TRUE))
df.fig2sb %>%
filter(study_accession %in% c("SDY80", "SDY180")) %>%
arrange(participant_id, study_time_collected) %>%
wilcox.test(formula = mu ~ study_time_collected,
data = .,
paired = TRUE)
##
## Wilcoxon signed rank test with continuity correction
##
## data: mu by study_time_collected
## V = 601, p-value = 0.002084
## alternative hypothesis: true location shift is not equal to 0
df.fig2sb %>%
filter(study_accession %in% c("SDY80", "SDY180")) %>%
select(participant_id, gr, mu, study_time_collected) %>%
pivot_wider(names_from = study_time_collected,
values_from = c(gr, mu)) %>%
mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
group_by(stable) %>%
summarize(n = n())
## # A tibble: 2 x 2
## stable n
## <lgl> <int>
## 1 FALSE 20
## 2 TRUE 45
df.fig2sb %>%
filter(study_accession %in% c("SDY80", "SDY180")) %>%
select(participant_id, gr, mu, study_time_collected) %>%
pivot_wider(names_from = study_time_collected,
values_from = c(gr, mu)) %>%
mutate(stable = (`gr_-7` == `gr_0`) | abs(`mu_0`-`mu_-7`)< 1) %>%
select(participant_id, stable) %>%
merge(x = df.fig2sb, by = "participant_id", all.x = TRUE) -> df.fig2sb
ggplot(data = df.fig2sb,
mapping = aes(x = factor(study_time_collected), y = mu)) +
geom_point(mapping = aes(color = gr)) +
geom_line(mapping = aes(group = participant_id,
linetype = stable)) +
scale_color_discrete(name = "pre-vax cluster") +
labs(x = "study_time_collected",
y = "Inflammatory genesets (SLEA z-score)") +
theme_bw()
getSleaPDSubset.byGS <- function(geneset){
pd <- pData(sleaSet)
pd$mu <- colMeans(exprs(sleaSet)[geneset, ])
pd <- pd[ !is.na(pd$gr), ]
}
getSleaPDSubset.byGenes <- function(genes){
df <- exprs(esetNoResp)[genes, ] %>%
as.data.frame() %>%
rownames_to_column(var = "gene") %>%
pivot_longer(cols = -gene, names_to = "uid") %>%
merge(y = pData(sleaSet), by = "uid") %>%
filter(!is.na(gr))
}
plotSLEASubsetByTime.byGS <- function(pd, ylabel){
ggplot(data = pd,
mapping = aes(x = study_time_collected, y = mu, color = gr)) +
geom_line(mapping = aes(group = participant_id),
alpha = 0.1) +
geom_hline(yintercept = 0, color = "grey", size = 2) +
geom_smooth(method = "loess", formula = "y~x", color = "black") +
facet_wrap(facets = ~gr) +
scale_x_continuous(limits = c(0, 28)) +
labs(y = ylabel,
x = "Time after vaccination (days)") +
theme_bw() +
theme(axis.text = element_text(size = 15),
strip.text = element_text(size = 20),
axis.title.x = element_text(size = 20),
legend.pos = "none",
panel.grid = element_blank())
}
plotSLEASubsetByTime.byGene <- function(df){
q2DF <- df %>%
group_by(gene) %>%
summarize(q2 = median(value))
ggplot(data = df,
mapping = aes(x = study_time_collected, y = value, color = gr)) +
geom_line(mapping = aes(group = participant_id),
alpha = 0.1) +
geom_hline(data = q2DF,
mapping = aes(yintercept = q2),
color = "grey") +
geom_smooth(method = "loess", formula = "y~x", color = "black") +
facet_grid(facets = gene~gr) +
scale_x_continuous(limits = c(0, 28)) +
scale_y_continuous(limits = c(7, 10.5)) +
labs(y = "log2 intensity (ComBat)") +
theme_bw() +
theme(axis.text = element_text(size = 15),
strip.text = element_text(size = 20),
axis.title.x = element_text(size = 20),
legend.pos = "none",
panel.grid = element_blank())
}
Author: Slim Fourati
pd.fig3a <- getSleaPDSubset(inflamGS)
plotSLEASubsetByTime(pd.fig3a, "Inflammatory pathways (SLEA z-score)")
Author: Slim Fourati
inflamGenes <- c("IL1B", "NLRP3", "TNFAIP2")
df.figs3a <- getSleaPDSubset.byGenes(inflamGenes)
plotSLEASubsetByTime.byGene(df.figs3a)
Author: Slim Fourati
interferonGS <- c("HALLMARK_INTERFERON_ALPHA_RESPONSE",
"HALLMARK_INTERFERON_GAMMA_RESPONSE")
pd.figs3b <- getSleaPDSubset(interferonGS)
plotSLEASubsetByTime(pd.figs3b, "Interferon-stimulated genes (SLEA z-score)")
Author: Slim Fourati
isgGenes <- c("STAT2", "IRF9", "MX1")
df.figs3b2 <- getSleaPDSubset.byGenes(isgGenes)
plotSLEASubsetByTime.byGene(df.figs3b2)
Author: Slim Fourati
bcellGS <- c("enriched in B cells (I) (M47.0)",
"enriched in B cells (V) (M47.4)",
"plasma cells & B cells, immunoglobulins (M156.0)")
pd.fig3c <- getSleaPDSubset(bcellGS)
plotSLEASubsetByTime(pd.fig3c, "B cells (SLEA z-score)")
Author: Slim Fourati
bcellGenes <- c("CD79A", "CD79B", "BANK1")
df.figs3c <- getSleaPDSubset.byGenes(bcellGenes)
plotSLEASubsetByTime.byGene(df.figs3c)
Author: Slim Fourati
pd.fig4a <- pData(sleaSet)[sleaSet$study_time_collected <= 0, ]
df.fig4a <- pd.fig4a %>%
merge(y = distinct(select(pData(esetWithResp),
participant_id, MFC, MFC_p30))) %>%
group_by(study_accession) %>%
mutate(sMFC = scale(MFC),
vaccine = paste0(pathogen,".", vaccine_type),
vaccine = ifelse(pathogen %in% "Meningococcus",
yes = "Meningococcus",
no = vaccine))
ggplot(data = df.fig4a,
mapping = aes(x = factor(gr), y = sMFC)) +
geom_beeswarm(cex = 1.1, size = 0.75) +
geom_boxplot(outlier.color = "transparent",
fill = "transparent",
col = "red") +
labs(x = "Prevax inflammatory cluster") +
theme_bw()
kruskal.test(x = df.fig4a$sMFC, g = df.fig4a$gr)
##
## Kruskal-Wallis rank sum test
##
## data: df.fig4a$sMFC and df.fig4a$gr
## Kruskal-Wallis chi-squared = 7.0348, df = 2, p-value = 0.02968
pairwise.wilcox.test(x = df.fig4a$sMFC,
g = df.fig4a$gr,
p.adjust.method = "none") %>%
print()
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: df.fig4a$sMFC and df.fig4a$gr
##
## low mid
## mid 0.1630 -
## high 0.0095 0.1676
##
## P value adjustment method: none
Author: Slim Fourati
df.fig4a %>%
filter(age_imputed < 50 & gr %in% c("low", "high")) %>%
group_by(vaccine) %>%
do(n = nrow(.),
estimate = {
fit <- try(wilcox.test(formula = sMFC~gr,
data = .,
conf.int = TRUE),
silent = TRUE)
if ("try-error" %in% class(fit)) {
NA
} else {
fit$estimate
}
},
p = {
fit <- try(wilcox.test(formula = sMFC~gr, data = ., conf.int = TRUE),
silent = TRUE)
if ("try-error" %in% class(fit)) {
NA
} else {
fit$p.value
}
}) %>%
mutate(n = unlist(n),
estimate = unlist(estimate),
p = unlist(p)) %>%
print()
## Source: local data frame [9 x 4]
## Groups: <by row>
##
## # A tibble: 9 x 4
## vaccine n estimate p
## <chr> <int> <dbl> <dbl>
## 1 Hepatitis B.Inactivated 15 -0.0000499 0.731
## 2 Influenza.Inactivated 359 -0.333 0.00589
## 3 Influenza.Live attenuated 21 -0.0000207 0.373
## 4 Meningococcus 22 -0.0579 0.794
## 5 Pneumococcus.Polysaccharide 6 0.695 0.0902
## 6 Smallpox.Live attenuated 2 0.784 1
## 7 Tuberculosis.Recombinant Viral Vector 8 0.121 1
## 8 Varicella Zoster.Live attenuated 13 -0.341 0.524
## 9 Yellow Fever.Live attenuated 62 -0.0000579 0.854
Author: Slim Fourati
plotTemp <- filter(df.fig4a, !(vaccine %in% c("Pneumococcus.Polysaccharide",
"Smallpox.Live attenuated",
"Tuberculosis.Recombinant Viral Vector")))
ggplot(data = filter(plotTemp, age_imputed < 50),
mapping = aes(x = factor(gr), y = sMFC)) +
geom_beeswarm() +
geom_boxplot(outlier.color = "transparent",
fill = "transparent",
col = "red") +
labs(x = "Prevax inflammatory cluster",
title = "age < 50") +
facet_wrap(facets = ~ifelse(test= vaccine %in% "Influenza.Inactivated",
yes = "Influenza.Inactivated",
no = "others")) +
theme_bw()
filter(plotTemp, vaccine %in% "Influenza.Inactivated") %>%
(function(p) { pairwise.wilcox.test(x = p$sMFC,
g = p$gr,
p.adjust.method = "none")}) %>%
print()
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: p$sMFC and p$gr
##
## low mid
## mid 0.3138 -
## high 0.0059 0.0771
##
## P value adjustment method: none
filter(plotTemp, vaccine != "Influenza.Inactivated") %>%
(function(p) { pairwise.wilcox.test(x = p$sMFC,
g = p$gr,
p.adjust.method = "none")}) %>%
print()
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: p$sMFC and p$gr
##
## low mid
## mid 0.14 -
## high 0.16 0.97
##
## P value adjustment method: none